knitr::opts_chunk$set(tidy = FALSE
,cache = FALSE
,fig.height = 8
,fig.width = 11)
library(tidyverse)
library(data.table)
library(dtplyr)
library(ggfortify)
library(plotly)
options(width = 80L)
set.seed(42)
Time series occur in almost any field of study that produces quantitative data. Whenever quantities are measured over time, those measurements form a time-series, or more formally, a discrete-time stochastic.
One reasonably famous example of a time-series is count of airline passengers in the US, as seen below. This is a fairly simple time-series, with measurements taken on a monthly basis over a number of years, with each datum consisting of a single number, i.e. this time-series is univariate.
data(AirPassengers)
str(AirPassengers)
## Time-Series [1:144] from 1949 to 1961: 112 118 132 129 121 135 148 148 136 119 ...
Having quickly looked at the data, we now plot it.
plot(AirPassengers)
The above plot uses base R packages - which is limited, so we will look to run a similar plot but using tools built in ggplot2.
We could build this from scratch, but the package ggfortify will help.
autoplot(AirPassengers) +
expand_limits(y = 0) +
xlab("Year") +
ylab("Count of Passengers ('000s)")
ap_decompose <- decompose(AirPassengers)
autoplot(ap_decompose)
## Warning: attributes are not identical across measure variables; they will be
## dropped
## Warning: Removed 24 rows containing missing values (geom_path).
Let’s try a multiplicative model.
ap_decompose_mult <- decompose(AirPassengers, type = 'multiplicative')
autoplot(ap_decompose_mult)
## Warning: attributes are not identical across measure variables; they will be
## dropped
## Warning: Removed 24 rows containing missing values (geom_path).
ap_decompose_stl <- stl(AirPassengers, s.window = 'periodic')
autoplot(ap_decompose_stl)
autoplot(AirPassengers) +
ggtitle("Plot of the Air Passengers")
autoplot(diff(AirPassengers)) +
ggtitle("Plot of the Diffs of the Air Passengers")
ap_decomp_resid <- ap_decompose$random[!is.na(ap_decompose$random)]
ap_acf <- acf(ap_decomp_resid, plot = FALSE)
autoplot(ap_acf) +
ggtitle("Correlogram of the Air Passenger Residuals")
ap_pacf <- pacf(ap_decomp_resid, plot = FALSE)
autoplot(ap_pacf) +
ggtitle("Partial Correlogram of the Air Passenger Residuals")
autoplot(pacf(diff(AirPassengers), plot = FALSE)) +
ggtitle("Partial Correlogram of the Differenced Air Passenger Data")
innovations <- rnorm(1000, 0, 1)
ma_1 <- arima.sim(list(ma = 0.8), innov = innovations, n = 100)
output_plot <- ggplot() +
geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
geom_line(aes(x = seq_along(ma_1), y = as.numeric(ma_1)), colour = 'red') +
xlab("Time Step") +
ylab("Value")
ggplotly(output_plot)
ma_2 <- arima.sim(list(ma = c(0.4, 0.4)), innov = innovations, n = 100)
output_plot <- ggplot() +
geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
geom_line(aes(x = seq_along(ma_2), y = as.numeric(ma_2)), colour = 'red') +
xlab("Time Step") +
ylab("Value")
ggplotly(output_plot)
output_plot <- ggplot() +
geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
geom_line(aes(x = seq_along(ma_1), y = as.numeric(ma_1)), colour = 'red') +
geom_line(aes(x = seq_along(ma_2), y = as.numeric(ma_2)), colour = 'blue') +
xlab("Time Step") +
ylab("Value")
ggplotly(output_plot)
Now that we have created MA series, we look at what the AR series look like.
ar_1 <- arima.sim(list(ar = 0.8), innov = innovations, n = 100)
output_plot <- ggplot() +
geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
geom_line(aes(x = seq_along(ar_1), y = as.numeric(ar_1)), colour = 'red') +
xlab("Time Step") +
ylab("Value")
ggplotly(output_plot)
ar_2 <- arima.sim(list(ar = c(0.4, 0.4)), innov = innovations, n = 100)
output_plot <- ggplot() +
geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
geom_line(aes(x = seq_along(ar_2), y = as.numeric(ar_2)), colour = 'red') +
xlab("Time Step") +
ylab("Value")
ggplotly(output_plot)
output_plot <- ggplot() +
geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
geom_line(aes(x = seq_along(ar_1), y = as.numeric(ar_1)), colour = 'red') +
geom_line(aes(x = seq_along(ar_2), y = as.numeric(ar_2)), colour = 'blue') +
xlab("Time Step") +
ylab("Value")
ggplotly(output_plot)
arma_1_1 <- arima.sim(list(ar = 0.4, ma = 0.4), innov = innovations, n = 100)
output_plot <- ggplot() +
geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100])) +
geom_line(aes(x = seq_along(ar_1), y = as.numeric(ar_1)), colour = 'red') +
geom_line(aes(x = seq_along(ma_1), y = as.numeric(ma_1)), colour = 'blue') +
geom_line(aes(x = seq_along(arma_1_1), y = as.numeric(arma_1_1)), colour = 'green') +
xlab("Time Step") +
ylab("Value")
ggplotly(output_plot)